#!/usr/bin/env python
# -*- coding: UTF-8 -*-
from sklearn.ensemble import IsolationForest
class IForest(object):
    """
    The IsolationForest 'isolates' observations by randomly selecting a feature and then
    randomly selecting a split value between the maximum and minimum values of the selected feature.

    https://cs.nju.edu.cn/zhouzh/zhouzh.files/publication/icdm08b.pdf
    """

    def __init__(self,
                 n_estimators=3,
                 max_samples="auto",
                 contamination=0.15,
                 max_feature=1.,
                 bootstrap=False,
                 n_jobs=1,
                 random_state=None,
                 verbose=0):
        """
        :param n_estimators: The number of base estimators in the ensemble.
        :param max_samples: The number of samples to draw from X to train each base estimator.
        :param coefficient: The amount of contamination of the data set, i.e. the proportion of outliers in the data set. Used when fitting to define the threshold on the decision function.
        :param max_features: The number of features to draw from X to train each base estimator.
        :param bootstrap: If True, individual trees are fit on random subsets of the training data sampled with replacement. If False, sampling without replacement is performed.
        :param random_state: If int, random_state is the seed used by the random number generator;
                              If RandomState instance, random_state is the random number generator;
                              If None, the random number generator is the RandomState instance used  by `np.random`.
        :param verbose: Controls the verbosity of the tree building process.
        """
        self.n_estimators = n_estimators
        self.max_samples = max_samples
        self.contamination = contamination
        self.max_feature = max_feature
        self.bootstrap = bootstrap
        self.n_jobs = n_jobs
        self.random_state = random_state
        self.verbose = verbose

    def predict_score(self, X, window= 12):
        """
        Predict if a particular sample is an outlier or not.

        :param X: the time series to detect of
        :param type X: pandas.Series
        :param window: the length of window
        :param type window: int
        :return: 1 denotes normal, 0 denotes abnormal.
        """
        x_train = list(range(0, 2 * window + 1)) + list(range(0, 2 * window + 1)) + list(range(0, window + 1))
        sample_features = zip(x_train, X)
        clf = IsolationForest(self.n_estimators, self.max_samples, self.contamination, self.max_feature, self.bootstrap, self.n_jobs, self.random_state, self.verbose)
        clf.fit(sample_features)
        scores = clf.decision_function(sample_features)
        return scores[-1]
        
